/*
 * Copyright 2017 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

#include "src/gpu/ganesh/gl/GrGLOpsRenderPass.h"

#include "src/gpu/ganesh/GrProgramInfo.h"
#include "src/gpu/ganesh/GrRenderTarget.h"
#include "src/gpu/ganesh/gl/GrGLProgram.h"

#ifdef SK_DEBUG
#include "include/gpu/GrDirectContext.h"
#include "src/gpu/ganesh/GrDirectContextPriv.h"
#endif

#define GL_CALL(X) GR_GL_CALL(fGpu->glInterface(), X)

void GrGLOpsRenderPass::set(GrRenderTarget *rt, bool useMSAASurface, const SkIRect &contentBounds,
    GrSurfaceOrigin origin, const LoadAndStoreInfo &colorInfo, const StencilLoadAndStoreInfo &stencilInfo)
{
    SkASSERT(fGpu);
    SkASSERT(!fRenderTarget);
    SkASSERT(fGpu == rt->getContext()->priv().getGpu());

    this->INHERITED::set(rt, origin);
    fUseMultisampleFBO = useMSAASurface;
    fContentBounds = contentBounds;
    fColorLoadAndStoreInfo = colorInfo;
    fStencilLoadAndStoreInfo = stencilInfo;
}

GrNativeRect GrGLOpsRenderPass::dmsaaLoadStoreBounds() const
{
    if (fGpu->glCaps().framebufferResolvesMustBeFullSize()) {
        // If frambeffer resolves have to be full size, then resolve the entire render target during
        // load and store both, even if we will be doing so with a draw. We do this because we will
        // have no other choice than to do a full size resolve at the end of the render pass, so the
        // full DMSAA attachment needs to have valid content.
        return GrNativeRect::MakeRelativeTo(fOrigin, fRenderTarget->height(),
            SkIRect::MakeSize(fRenderTarget->dimensions()));
    } else {
        return GrNativeRect::MakeRelativeTo(fOrigin, fRenderTarget->height(), fContentBounds);
    }
}

void GrGLOpsRenderPass::onBegin()
{
    auto glRT = static_cast<GrGLRenderTarget *>(fRenderTarget);
    if (fUseMultisampleFBO && fColorLoadAndStoreInfo.fLoadOp == GrLoadOp::kLoad && glRT->hasDynamicMSAAAttachment()) {
        // Load the single sample fbo into the dmsaa attachment.
        if (fGpu->glCaps().canResolveSingleToMSAA()) {
            fGpu->resolveRenderFBOs(glRT, this->dmsaaLoadStoreBounds().asSkIRect(),
                GrGLGpu::ResolveDirection::kSingleToMSAA);
        } else {
            fGpu->drawSingleIntoMSAAFBO(glRT, this->dmsaaLoadStoreBounds().asSkIRect());
        }
    }

    fGpu->beginCommandBuffer(glRT, fUseMultisampleFBO, fContentBounds, fOrigin, fColorLoadAndStoreInfo,
        fStencilLoadAndStoreInfo);
}

void GrGLOpsRenderPass::onEnd()
{
    auto glRT = static_cast<GrGLRenderTarget *>(fRenderTarget);
    fGpu->endCommandBuffer(glRT, fUseMultisampleFBO, fColorLoadAndStoreInfo, fStencilLoadAndStoreInfo);

    if (fUseMultisampleFBO && fColorLoadAndStoreInfo.fStoreOp == GrStoreOp::kStore &&
        glRT->hasDynamicMSAAAttachment()) {
        // Blit the msaa attachment into the single sample fbo.
        fGpu->resolveRenderFBOs(glRT, this->dmsaaLoadStoreBounds().asSkIRect(),
            GrGLGpu::ResolveDirection::kMSAAToSingle, true /* invalidateReadBufferAfterBlit */);
    }
}

bool GrGLOpsRenderPass::onBindPipeline(const GrProgramInfo &programInfo, const SkRect &drawBounds)
{
    fPrimitiveType = programInfo.primitiveType();
    return fGpu->flushGLState(fRenderTarget, fUseMultisampleFBO, programInfo);
}

void GrGLOpsRenderPass::onSetScissorRect(const SkIRect &scissor)
{
    fGpu->flushScissorRect(scissor, fRenderTarget->height(), fOrigin);
}

bool GrGLOpsRenderPass::onBindTextures(const GrGeometryProcessor &geomProc,
    const GrSurfaceProxy * const geomProcTextures[], const GrPipeline &pipeline)
{
    GrGLProgram *program = fGpu->currentProgram();
    SkASSERT(program);
    program->bindTextures(geomProc, geomProcTextures, pipeline);
    return true;
}

void GrGLOpsRenderPass::onBindBuffers(sk_sp<const GrBuffer> indexBuffer, sk_sp<const GrBuffer> instanceBuffer,
    sk_sp<const GrBuffer> vertexBuffer, GrPrimitiveRestart primitiveRestart)
{
    SkASSERT((primitiveRestart == GrPrimitiveRestart::kNo) || indexBuffer);
    GrGLProgram *program = fGpu->currentProgram();
    SkASSERT(program);

#ifdef SK_DEBUG
    fDidBindInstanceBuffer = false;
    fDidBindVertexBuffer = false;
#endif

    int numAttribs = program->numVertexAttributes() + program->numInstanceAttributes();
    fAttribArrayState = fGpu->bindInternalVertexArray(indexBuffer.get(), numAttribs, primitiveRestart);

    if (indexBuffer) {
        if (indexBuffer->isCpuBuffer()) {
            auto *cpuIndexBuffer = static_cast<const GrCpuBuffer *>(indexBuffer.get());
            fIndexPointer = reinterpret_cast<const uint16_t *>(cpuIndexBuffer->data());
        } else {
            fIndexPointer = nullptr;
        }
    }

    // If this platform does not support baseInstance, defer binding of the instance buffer.
    if (fGpu->glCaps().baseVertexBaseInstanceSupport()) {
        this->bindInstanceBuffer(instanceBuffer.get(), 0);
        SkDEBUGCODE(fDidBindInstanceBuffer = true;)
    }
    fActiveInstanceBuffer = std::move(instanceBuffer);

    // We differ binding the vertex buffer for one of two situations:
    // 1) This platform does not support baseVertex with indexed draws.
    // 2) There is a driver bug affecting glDrawArrays.
    if ((indexBuffer && fGpu->glCaps().baseVertexBaseInstanceSupport()) ||
        (!indexBuffer && !fGpu->glCaps().drawArraysBaseVertexIsBroken())) {
        this->bindVertexBuffer(vertexBuffer.get(), 0);
        SkDEBUGCODE(fDidBindVertexBuffer = true;)
    }
    fActiveVertexBuffer = std::move(vertexBuffer);
    fActiveIndexBuffer = std::move(indexBuffer);
}

void GrGLOpsRenderPass::bindInstanceBuffer(const GrBuffer *instanceBuffer, int baseInstance)
{
    GrGLProgram *program = fGpu->currentProgram();
    SkASSERT(program);
    if (int instanceStride = program->instanceStride()) {
        SkASSERT(instanceBuffer);
        SkASSERT(instanceBuffer->isCpuBuffer() || !static_cast<const GrGpuBuffer *>(instanceBuffer)->isMapped());
        size_t bufferOffset = baseInstance * static_cast<size_t>(instanceStride);
        int attribIdx = program->numVertexAttributes();
        for (int i = 0; i < program->numInstanceAttributes(); ++i, ++attribIdx) {
            const auto &attrib = program->instanceAttribute(i);
            static constexpr int kDivisor = 1;
            fAttribArrayState->set(fGpu, attrib.fLocation, instanceBuffer, attrib.fCPUType, attrib.fGPUType,
                instanceStride, bufferOffset + attrib.fOffset, kDivisor);
        }
    }
}

void GrGLOpsRenderPass::bindVertexBuffer(const GrBuffer *vertexBuffer, int baseVertex)
{
    GrGLProgram *program = fGpu->currentProgram();
    SkASSERT(program);
    if (int vertexStride = program->vertexStride()) {
        SkASSERT(vertexBuffer);
        SkASSERT(vertexBuffer->isCpuBuffer() || !static_cast<const GrGpuBuffer *>(vertexBuffer)->isMapped());
        size_t bufferOffset = baseVertex * static_cast<size_t>(vertexStride);
        for (int i = 0; i < program->numVertexAttributes(); ++i) {
            const auto &attrib = program->vertexAttribute(i);
            static constexpr int kDivisor = 0;
            fAttribArrayState->set(fGpu, attrib.fLocation, vertexBuffer, attrib.fCPUType, attrib.fGPUType, vertexStride,
                bufferOffset + attrib.fOffset, kDivisor);
        }
    }
}

void GrGLOpsRenderPass::onDraw(int vertexCount, int baseVertex)
{
    SkASSERT(fDidBindVertexBuffer || fGpu->glCaps().drawArraysBaseVertexIsBroken());
    GrGLenum glPrimType = fGpu->prepareToDraw(fPrimitiveType);
    if (fGpu->glCaps().drawArraysBaseVertexIsBroken()) {
        this->bindVertexBuffer(fActiveVertexBuffer.get(), baseVertex);
        baseVertex = 0;
    }
    GL_CALL(DrawArrays(glPrimType, baseVertex, vertexCount));
    fGpu->didDrawTo(fRenderTarget);
}

void GrGLOpsRenderPass::onDrawIndexed(int indexCount, int baseIndex, uint16_t minIndexValue, uint16_t maxIndexValue,
    int baseVertex)
{
    GrGLenum glPrimType = fGpu->prepareToDraw(fPrimitiveType);
    if (fGpu->glCaps().baseVertexBaseInstanceSupport()) {
        SkASSERT(fGpu->glCaps().drawInstancedSupport());
        SkASSERT(fDidBindVertexBuffer);
        if (baseVertex != 0) {
            GL_CALL(DrawElementsInstancedBaseVertexBaseInstance(glPrimType, indexCount, GR_GL_UNSIGNED_SHORT,
                this->offsetForBaseIndex(baseIndex), 1, baseVertex, 0));
            return;
        }
    } else {
        this->bindVertexBuffer(fActiveVertexBuffer.get(), baseVertex);
    }

    if (fGpu->glCaps().drawRangeElementsSupport()) {
        GL_CALL(DrawRangeElements(glPrimType, minIndexValue, maxIndexValue, indexCount, GR_GL_UNSIGNED_SHORT,
            this->offsetForBaseIndex(baseIndex)));
    } else {
        GL_CALL(DrawElements(glPrimType, indexCount, GR_GL_UNSIGNED_SHORT, this->offsetForBaseIndex(baseIndex)));
    }
    fGpu->didDrawTo(fRenderTarget);
}

void GrGLOpsRenderPass::onDrawInstanced(int instanceCount, int baseInstance, int vertexCount, int baseVertex)
{
    SkASSERT(fDidBindVertexBuffer || fGpu->glCaps().drawArraysBaseVertexIsBroken());
    if (fGpu->glCaps().drawArraysBaseVertexIsBroken()) {
        // We weren't able to bind the vertex buffer during onBindBuffers because of a driver bug
        // affecting glDrawArrays.
        this->bindVertexBuffer(fActiveVertexBuffer.get(), 0);
    }
    int maxInstances = fGpu->glCaps().maxInstancesPerDrawWithoutCrashing(instanceCount);
    for (int i = 0; i < instanceCount; i += maxInstances) {
        GrGLenum glPrimType = fGpu->prepareToDraw(fPrimitiveType);
        int instanceCountForDraw = std::min(instanceCount - i, maxInstances);
        int baseInstanceForDraw = baseInstance + i;
        if (fGpu->glCaps().baseVertexBaseInstanceSupport()) {
            SkASSERT(fDidBindInstanceBuffer);
            GL_CALL(DrawArraysInstancedBaseInstance(glPrimType, baseVertex, vertexCount, instanceCountForDraw,
                baseInstanceForDraw));
        } else {
            this->bindInstanceBuffer(fActiveInstanceBuffer.get(), baseInstanceForDraw);
            GL_CALL(DrawArraysInstanced(glPrimType, baseVertex, vertexCount, instanceCountForDraw));
        }
    }
    fGpu->didDrawTo(fRenderTarget);
}

void GrGLOpsRenderPass::onDrawIndexedInstanced(int indexCount, int baseIndex, int instanceCount, int baseInstance,
    int baseVertex)
{
    int maxInstances = fGpu->glCaps().maxInstancesPerDrawWithoutCrashing(instanceCount);
    for (int i = 0; i < instanceCount; i += maxInstances) {
        GrGLenum glPrimType = fGpu->prepareToDraw(fPrimitiveType);
        int instanceCountForDraw = std::min(instanceCount - i, maxInstances);
        int baseInstanceForDraw = baseInstance + i;
        if (fGpu->glCaps().baseVertexBaseInstanceSupport()) {
            SkASSERT(fDidBindInstanceBuffer);
            SkASSERT(fDidBindVertexBuffer);
            GL_CALL(DrawElementsInstancedBaseVertexBaseInstance(glPrimType, indexCount, GR_GL_UNSIGNED_SHORT,
                this->offsetForBaseIndex(baseIndex), instanceCountForDraw, baseVertex, baseInstanceForDraw));
        } else {
            this->bindInstanceBuffer(fActiveInstanceBuffer.get(), baseInstanceForDraw);
            this->bindVertexBuffer(fActiveVertexBuffer.get(), baseVertex);
            GL_CALL(DrawElementsInstanced(glPrimType, indexCount, GR_GL_UNSIGNED_SHORT,
                this->offsetForBaseIndex(baseIndex), instanceCountForDraw));
        }
    }
    fGpu->didDrawTo(fRenderTarget);
}

static const void *buffer_offset_to_gl_address(const GrBuffer *drawIndirectBuffer, size_t offset)
{
    if (drawIndirectBuffer->isCpuBuffer()) {
        return static_cast<const GrCpuBuffer *>(drawIndirectBuffer)->data() + offset;
    } else {
        return (offset) ? reinterpret_cast<const void *>(offset) : nullptr;
    }
}

void GrGLOpsRenderPass::onDrawIndirect(const GrBuffer *drawIndirectBuffer, size_t offset, int drawCount)
{
    using MultiDrawType = GrGLCaps::MultiDrawType;

    SkASSERT(fGpu->caps()->nativeDrawIndirectSupport());
    SkASSERT(fGpu->glCaps().baseVertexBaseInstanceSupport());
    SkASSERT(fDidBindVertexBuffer || fGpu->glCaps().drawArraysBaseVertexIsBroken());

    if (fGpu->glCaps().drawArraysBaseVertexIsBroken()) {
        // We weren't able to bind the vertex buffer during onBindBuffers because of a driver bug
        // affecting glDrawArrays.
        this->bindVertexBuffer(fActiveVertexBuffer.get(), 0);
    }

    if (fGpu->glCaps().multiDrawType() == MultiDrawType::kANGLEOrWebGL) {
        // ANGLE and WebGL don't support glDrawElementsIndirect. We draw everything as a multi draw.
        this->multiDrawArraysANGLEOrWebGL(drawIndirectBuffer, offset, drawCount);
        return;
    }

    fGpu->bindBuffer(GrGpuBufferType::kDrawIndirect, drawIndirectBuffer);

    if (drawCount > 1 && fGpu->glCaps().multiDrawType() == MultiDrawType::kMultiDrawIndirect) {
        GrGLenum glPrimType = fGpu->prepareToDraw(fPrimitiveType);
        GL_CALL(MultiDrawArraysIndirect(glPrimType, buffer_offset_to_gl_address(drawIndirectBuffer, offset), drawCount,
            sizeof(GrDrawIndirectCommand)));
        return;
    }

    for (int i = 0; i < drawCount; ++i) {
        GrGLenum glPrimType = fGpu->prepareToDraw(fPrimitiveType);
        GL_CALL(DrawArraysIndirect(glPrimType, buffer_offset_to_gl_address(drawIndirectBuffer, offset)));
        offset += sizeof(GrDrawIndirectCommand);
    }
    fGpu->didDrawTo(fRenderTarget);
}

void GrGLOpsRenderPass::multiDrawArraysANGLEOrWebGL(const GrBuffer *drawIndirectBuffer, size_t offset, int drawCount)
{
    SkASSERT(fGpu->glCaps().multiDrawType() == GrGLCaps::MultiDrawType::kANGLEOrWebGL);
    SkASSERT(drawIndirectBuffer->isCpuBuffer());

    constexpr static int kMaxDrawCountPerBatch = 128;
    GrGLint fFirsts[kMaxDrawCountPerBatch];
    GrGLsizei fCounts[kMaxDrawCountPerBatch];
    GrGLsizei fInstanceCounts[kMaxDrawCountPerBatch];
    GrGLuint fBaseInstances[kMaxDrawCountPerBatch];

    GrGLenum glPrimType = fGpu->prepareToDraw(fPrimitiveType);
    auto *cpuBuffer = static_cast<const GrCpuBuffer *>(drawIndirectBuffer);
    auto *cmds = reinterpret_cast<const GrDrawIndirectCommand *>(cpuBuffer->data() + offset);

    while (drawCount) {
        int countInBatch = std::min(drawCount, kMaxDrawCountPerBatch);
        for (int i = 0; i < countInBatch; ++i) {
            auto [vertexCount, instanceCount, baseVertex, baseInstance] = cmds[i];
            fFirsts[i] = baseVertex;
            fCounts[i] = vertexCount;
            fInstanceCounts[i] = instanceCount;
            fBaseInstances[i] = baseInstance;
        }
        if (countInBatch == 1) {
            GL_CALL(DrawArraysInstancedBaseInstance(glPrimType, fFirsts[0], fCounts[0], fInstanceCounts[0],
                fBaseInstances[0]));
        } else {
            GL_CALL(MultiDrawArraysInstancedBaseInstance(glPrimType, fFirsts, fCounts, fInstanceCounts, fBaseInstances,
                countInBatch));
        }
        drawCount -= countInBatch;
        cmds += countInBatch;
    }
    fGpu->didDrawTo(fRenderTarget);
}

void GrGLOpsRenderPass::onDrawIndexedIndirect(const GrBuffer *drawIndirectBuffer, size_t offset, int drawCount)
{
    using MultiDrawType = GrGLCaps::MultiDrawType;

    SkASSERT(fGpu->caps()->nativeDrawIndirectSupport());
    SkASSERT(!fGpu->caps()->nativeDrawIndexedIndirectIsBroken());
    SkASSERT(fGpu->glCaps().baseVertexBaseInstanceSupport());
    // The vertex buffer should have already gotten bound (as opposed us stashing it away during
    // onBindBuffers and not expecting to bind it until this point).
    SkASSERT(fDidBindVertexBuffer);

    if (fGpu->glCaps().multiDrawType() == MultiDrawType::kANGLEOrWebGL) {
        // ANGLE and WebGL don't support glDrawElementsIndirect. We draw everything as a multi draw.
        this->multiDrawElementsANGLEOrWebGL(drawIndirectBuffer, offset, drawCount);
        return;
    }

    fGpu->bindBuffer(GrGpuBufferType::kDrawIndirect, drawIndirectBuffer);

    if (drawCount > 1 && fGpu->glCaps().multiDrawType() == MultiDrawType::kMultiDrawIndirect) {
        GrGLenum glPrimType = fGpu->prepareToDraw(fPrimitiveType);
        GL_CALL(MultiDrawElementsIndirect(glPrimType, GR_GL_UNSIGNED_SHORT,
            buffer_offset_to_gl_address(drawIndirectBuffer, offset), drawCount, sizeof(GrDrawIndexedIndirectCommand)));
        return;
    }

    for (int i = 0; i < drawCount; ++i) {
        GrGLenum glPrimType = fGpu->prepareToDraw(fPrimitiveType);
        GL_CALL(DrawElementsIndirect(glPrimType, GR_GL_UNSIGNED_SHORT,
            buffer_offset_to_gl_address(drawIndirectBuffer, offset)));
        offset += sizeof(GrDrawIndexedIndirectCommand);
    }
    fGpu->didDrawTo(fRenderTarget);
}

void GrGLOpsRenderPass::multiDrawElementsANGLEOrWebGL(const GrBuffer *drawIndirectBuffer, size_t offset, int drawCount)
{
    SkASSERT(fGpu->glCaps().multiDrawType() == GrGLCaps::MultiDrawType::kANGLEOrWebGL);
    SkASSERT(drawIndirectBuffer->isCpuBuffer());

    constexpr static int kMaxDrawCountPerBatch = 128;
    GrGLint fCounts[kMaxDrawCountPerBatch];
    const void *fIndices[kMaxDrawCountPerBatch];
    GrGLsizei fInstanceCounts[kMaxDrawCountPerBatch];
    GrGLint fBaseVertices[kMaxDrawCountPerBatch];
    GrGLuint fBaseInstances[kMaxDrawCountPerBatch];

    GrGLenum glPrimType = fGpu->prepareToDraw(fPrimitiveType);
    auto *cpuBuffer = static_cast<const GrCpuBuffer *>(drawIndirectBuffer);
    auto *cmds = reinterpret_cast<const GrDrawIndexedIndirectCommand *>(cpuBuffer->data() + offset);

    while (drawCount) {
        int countInBatch = std::min(drawCount, kMaxDrawCountPerBatch);
        for (int i = 0; i < countInBatch; ++i) {
            auto [indexCount, instanceCount, baseIndex, baseVertex, baseInstance] = cmds[i];
            fCounts[i] = indexCount;
            fIndices[i] = this->offsetForBaseIndex(baseIndex);
            fInstanceCounts[i] = instanceCount;
            fBaseVertices[i] = baseVertex;
            fBaseInstances[i] = baseInstance;
        }
        if (countInBatch == 1) {
            GL_CALL(DrawElementsInstancedBaseVertexBaseInstance(glPrimType, fCounts[0], GR_GL_UNSIGNED_SHORT,
                fIndices[0], fInstanceCounts[0], fBaseVertices[0], fBaseInstances[0]));
        } else {
            GL_CALL(MultiDrawElementsInstancedBaseVertexBaseInstance(glPrimType, fCounts, GR_GL_UNSIGNED_SHORT,
                fIndices, fInstanceCounts, fBaseVertices, fBaseInstances, countInBatch));
        }
        drawCount -= countInBatch;
        cmds += countInBatch;
    }
    fGpu->didDrawTo(fRenderTarget);
}

void GrGLOpsRenderPass::onClear(const GrScissorState &scissor, std::array<float, 4> color)
{
    fGpu->clear(scissor, color, fRenderTarget, fUseMultisampleFBO, fOrigin);
}

void GrGLOpsRenderPass::onClearStencilClip(const GrScissorState &scissor, bool insideStencilMask)
{
    fGpu->clearStencilClip(scissor, insideStencilMask, fRenderTarget, fUseMultisampleFBO, fOrigin);
}
